# dataset
library(PCAWG7)
# PCA plotting tool
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(tibble)
library(tidyr)
# heatmap
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggfortify)
# for dataframe manipulation
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# for tsne computation
library(Rtsne)
# for umap computation
library(umap)
# General plotting tool
library(ggplot2)
#
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# for 3D pca plot
library(rgl)
# for tsne computation
library(Rtsne)
# for umap computation
library(umap)
#library(rsvd)
all_exposure<- as.data.frame(t(PCAWG7::exposure$PCAWG$SBS96))
aa <- PCAWG7::exposure.stats$PCAWG$SBS96
bb <- lapply(aa, rownames)
enframe(bb) %>%
unnest(value)
## # A tibble: 357 × 2
## name value
## <chr> <chr>
## 1 Biliary-AdenoCA SBS1
## 2 Biliary-AdenoCA SBS2
## 3 Biliary-AdenoCA SBS3
## 4 Biliary-AdenoCA SBS5
## 5 Biliary-AdenoCA SBS9
## 6 Biliary-AdenoCA SBS12
## 7 Biliary-AdenoCA SBS13
## 8 Biliary-AdenoCA SBS15
## 9 Biliary-AdenoCA SBS17a
## 10 Biliary-AdenoCA SBS17b
## # … with 347 more rows
data.frame(lapply(bb, "length<-", max(lengths(bb))))
## Biliary.AdenoCA Bladder.TCC Bone.Benign Bone.Epith Bone.Osteosarc
## 1 SBS1 SBS1 SBS1 SBS1 SBS1
## 2 SBS2 SBS2 SBS2 SBS2 SBS2
## 3 SBS3 SBS5 SBS5 SBS5 SBS3
## 4 SBS5 SBS8 SBS8 SBS13 SBS5
## 5 SBS9 SBS13 SBS13 SBS17a SBS8
## 6 SBS12 SBS29 SBS40 SBS17b SBS13
## 7 SBS13 SBS40 SBS51 SBS40 SBS17a
## 8 SBS15 <NA> SBS60 <NA> SBS17b
## 9 SBS17a <NA> <NA> <NA> SBS30
## 10 SBS17b <NA> <NA> <NA> SBS40
## 11 SBS18 <NA> <NA> <NA> <NA>
## 12 SBS21 <NA> <NA> <NA> <NA>
## 13 SBS22 <NA> <NA> <NA> <NA>
## 14 SBS24 <NA> <NA> <NA> <NA>
## 15 SBS32 <NA> <NA> <NA> <NA>
## 16 SBS40 <NA> <NA> <NA> <NA>
## 17 SBS44 <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA> <NA>
## 20 <NA> <NA> <NA> <NA> <NA>
## 21 <NA> <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA> <NA>
## Breast.AdenoCA Breast.DCIS Breast.LobularCA Cervix.AdenoCA Cervix.SCC
## 1 SBS1 SBS1 SBS1 SBS1 SBS1
## 2 SBS2 SBS5 SBS2 SBS2 SBS2
## 3 SBS3 SBS40 SBS3 SBS5 SBS5
## 4 SBS5 <NA> SBS5 SBS13 SBS13
## 5 SBS8 <NA> SBS13 <NA> SBS18
## 6 SBS9 <NA> SBS18 <NA> SBS40
## 7 SBS13 <NA> <NA> <NA> <NA>
## 8 SBS17a <NA> <NA> <NA> <NA>
## 9 SBS17b <NA> <NA> <NA> <NA>
## 10 SBS18 <NA> <NA> <NA> <NA>
## 11 SBS37 <NA> <NA> <NA> <NA>
## 12 SBS40 <NA> <NA> <NA> <NA>
## 13 SBS41 <NA> <NA> <NA> <NA>
## 14 <NA> <NA> <NA> <NA> <NA>
## 15 <NA> <NA> <NA> <NA> <NA>
## 16 <NA> <NA> <NA> <NA> <NA>
## 17 <NA> <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA> <NA>
## 20 <NA> <NA> <NA> <NA> <NA>
## 21 <NA> <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA> <NA>
## CNS.GBM CNS.Medullo CNS.Oligo CNS.PiloAstro ColoRect.AdenoCA Eso.AdenoCA
## 1 SBS1 SBS1 SBS1 SBS1 SBS1 SBS1
## 2 SBS5 SBS5 SBS5 SBS5 SBS5 SBS2
## 3 SBS11 SBS8 SBS8 SBS19 SBS10a SBS3
## 4 SBS30 SBS18 SBS40 SBS23 SBS10b SBS5
## 5 SBS37 SBS39 <NA> SBS40 SBS15 SBS13
## 6 SBS40 SBS40 <NA> <NA> SBS17a SBS17a
## 7 <NA> SBS60 <NA> <NA> SBS17b SBS17b
## 8 <NA> <NA> <NA> <NA> SBS18 SBS18
## 9 <NA> <NA> <NA> <NA> SBS28 SBS28
## 10 <NA> <NA> <NA> <NA> SBS37 SBS40
## 11 <NA> <NA> <NA> <NA> SBS40 <NA>
## 12 <NA> <NA> <NA> <NA> SBS44 <NA>
## 13 <NA> <NA> <NA> <NA> SBS45 <NA>
## 14 <NA> <NA> <NA> <NA> <NA> <NA>
## 15 <NA> <NA> <NA> <NA> <NA> <NA>
## 16 <NA> <NA> <NA> <NA> <NA> <NA>
## 17 <NA> <NA> <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA> <NA> <NA>
## 20 <NA> <NA> <NA> <NA> <NA> <NA>
## 21 <NA> <NA> <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA> <NA> <NA>
## Head.SCC Kidney.ChRCC Kidney.RCC Liver.HCC Lung.AdenoCA Lung.SCC Lymph.BNHL
## 1 SBS1 SBS1 SBS1 SBS1 SBS1 SBS1 SBS1
## 2 SBS2 SBS2 SBS2 SBS4 SBS2 SBS2 SBS2
## 3 SBS3 SBS5 SBS5 SBS5 SBS3 SBS4 SBS3
## 4 SBS4 SBS13 SBS13 SBS6 SBS4 SBS5 SBS5
## 5 SBS5 SBS17a SBS22 SBS9 SBS5 SBS8 SBS6
## 6 SBS7a SBS17b SBS29 SBS12 SBS9 SBS13 SBS9
## 7 SBS7b SBS29 SBS40 SBS14 SBS13 <NA> SBS13
## 8 SBS7d SBS40 SBS41 SBS16 SBS17a <NA> SBS17a
## 9 SBS13 <NA> <NA> SBS17a SBS17b <NA> SBS17b
## 10 SBS16 <NA> <NA> SBS17b SBS18 <NA> SBS34
## 11 SBS17a <NA> <NA> SBS18 SBS28 <NA> SBS36
## 12 SBS17b <NA> <NA> SBS19 SBS40 <NA> SBS37
## 13 SBS18 <NA> <NA> SBS22 <NA> <NA> SBS40
## 14 SBS33 <NA> <NA> SBS24 <NA> <NA> SBS56
## 15 SBS40 <NA> <NA> SBS26 <NA> <NA> <NA>
## 16 <NA> <NA> <NA> SBS28 <NA> <NA> <NA>
## 17 <NA> <NA> <NA> SBS29 <NA> <NA> <NA>
## 18 <NA> <NA> <NA> SBS30 <NA> <NA> <NA>
## 19 <NA> <NA> <NA> SBS31 <NA> <NA> <NA>
## 20 <NA> <NA> <NA> SBS35 <NA> <NA> <NA>
## 21 <NA> <NA> <NA> SBS40 <NA> <NA> <NA>
## 22 <NA> <NA> <NA> SBS53 <NA> <NA> <NA>
## 23 <NA> <NA> <NA> SBS54 <NA> <NA> <NA>
## 24 <NA> <NA> <NA> SBS56 <NA> <NA> <NA>
## Lymph.CLL Myeloid.AML Myeloid.MDS Myeloid.MPN Ovary.AdenoCA Panc.AdenoCA
## 1 SBS1 SBS1 SBS1 SBS1 SBS1 SBS1
## 2 SBS5 SBS5 SBS5 SBS2 SBS2 SBS2
## 3 SBS9 SBS18 SBS23 SBS5 SBS3 SBS3
## 4 SBS40 SBS31 <NA> SBS19 SBS5 SBS5
## 5 <NA> SBS60 <NA> SBS23 SBS8 SBS6
## 6 <NA> <NA> <NA> SBS32 SBS13 SBS8
## 7 <NA> <NA> <NA> SBS51 SBS18 SBS13
## 8 <NA> <NA> <NA> SBS58 SBS26 SBS17a
## 9 <NA> <NA> <NA> SBS60 SBS35 SBS17b
## 10 <NA> <NA> <NA> <NA> SBS39 SBS18
## 11 <NA> <NA> <NA> <NA> SBS40 SBS20
## 12 <NA> <NA> <NA> <NA> SBS41 SBS26
## 13 <NA> <NA> <NA> <NA> <NA> SBS28
## 14 <NA> <NA> <NA> <NA> <NA> SBS30
## 15 <NA> <NA> <NA> <NA> <NA> SBS40
## 16 <NA> <NA> <NA> <NA> <NA> SBS51
## 17 <NA> <NA> <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA> <NA> <NA>
## 20 <NA> <NA> <NA> <NA> <NA> <NA>
## 21 <NA> <NA> <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA> <NA> <NA>
## Panc.Endocrine Prost.AdenoCA Skin.Melanoma SoftTissue.Leiomyo
## 1 SBS1 SBS1 SBS1 SBS1
## 2 SBS2 SBS2 SBS2 SBS2
## 3 SBS3 SBS3 SBS5 SBS5
## 4 SBS5 SBS5 SBS7a SBS13
## 5 SBS6 SBS8 SBS7b SBS17a
## 6 SBS8 SBS13 SBS7c SBS17b
## 7 SBS9 SBS18 SBS7d SBS30
## 8 SBS11 SBS33 SBS13 SBS40
## 9 SBS13 SBS37 SBS17a <NA>
## 10 SBS26 SBS40 SBS17b <NA>
## 11 SBS30 SBS41 SBS38 <NA>
## 12 SBS36 SBS45 SBS40 <NA>
## 13 SBS39 SBS52 SBS58 <NA>
## 14 <NA> SBS58 <NA> <NA>
## 15 <NA> <NA> <NA> <NA>
## 16 <NA> <NA> <NA> <NA>
## 17 <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA>
## 20 <NA> <NA> <NA> <NA>
## 21 <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA>
## SoftTissue.Liposarc Stomach.AdenoCA Thy.AdenoCA Uterus.AdenoCA
## 1 SBS1 SBS1 SBS1 SBS1
## 2 SBS2 SBS2 SBS2 SBS2
## 3 SBS3 SBS3 SBS5 SBS3
## 4 SBS5 SBS5 SBS13 SBS5
## 5 SBS13 SBS9 SBS40 SBS6
## 6 SBS37 SBS13 SBS58 SBS10a
## 7 SBS40 SBS15 <NA> SBS10b
## 8 <NA> SBS17a <NA> SBS13
## 9 <NA> SBS17b <NA> SBS14
## 10 <NA> SBS18 <NA> SBS15
## 11 <NA> SBS20 <NA> SBS26
## 12 <NA> SBS21 <NA> SBS28
## 13 <NA> SBS26 <NA> SBS40
## 14 <NA> SBS28 <NA> SBS44
## 15 <NA> SBS40 <NA> <NA>
## 16 <NA> SBS41 <NA> <NA>
## 17 <NA> SBS43 <NA> <NA>
## 18 <NA> SBS44 <NA> <NA>
## 19 <NA> SBS51 <NA> <NA>
## 20 <NA> SBS58 <NA> <NA>
## 21 <NA> <NA> <NA> <NA>
## 22 <NA> <NA> <NA> <NA>
## 23 <NA> <NA> <NA> <NA>
## 24 <NA> <NA> <NA> <NA>
all_spectra <- as.data.frame(t(PCAWG7::spectra$PCAWG$SBS96
#PCAWG7::spectra$TCGA$SBS96)))
))
#PCAWG7::spectra$PCAWG$SBS192,
#PCAWG7::spectra$PCAWG$SBS1536,
#PCAWG7::spectra$PCAWG$ID,
#PCAWG7::spectra$PCAWG$DBS78)))
all_spectra <- rownames_to_column(all_spectra,"cancer_type")
# extract cancer type from sample name
for (i in 1:length(all_spectra$cancer_type)){
tmp <- strsplit(all_spectra$cancer_type[i],"::")[[1]][1]
all_spectra$cancer_type[i] <- tmp
}
dim(all_spectra)
## [1] 2780 97
all_spectra_subtypes <- all_spectra
# number of cancer types in the study
count(unique(all_spectra['cancer_type']))
## n
## 1 37
# combine subtypes to bigger type groups
for (i in 1:length(all_spectra$cancer_type)){
tmp <- strsplit(all_spectra$cancer_type[i],"-")[[1]][1]
all_spectra$cancer_type[i] <- tmp
}
# number of cancer subtypes in the study
count(unique(all_spectra['cancer_type']))
## n
## 1 22
PlotSpectraAsSigsWithUncertainty <- function(xx,mean_xx, title = "Mean.as.signature") {
arrow.tops <- apply(xx, 2, max)
arrow.bottoms <- apply(xx, 2, min)
options(digits = 2)
bp <- ICAMS::PlotCatalog(
catalog = mean_xx,
upper = TRUE,
xlabels = TRUE,
cex = 0.8,
ylim = c(min(arrow.bottoms), max(arrow.tops) + 0.005)
)
add.arrows(bp$plot.object, arrow.tops, arrow.bottoms)
#xx$arrow.tops <- arrow.tops
#xx$arrow.bottoms <- arrow.bottoms
return(invisible(xx))
}
add.arrows <- function(bp, tops, bottoms) {
oldopt <- getOption("warn")
on.exit(options(warn = oldopt))
options(warn = -1) # Does not seem to turn off warnings
which0 <- which((tops - bottoms) == 0)
tops[which0] <- tops[which0] + 1e-5
suppressWarnings(
# Necessary because generates warnings for 0-length arrows
arrows(
x0 = bp,
y0 = tops, # location of up arrow tips
y1 = bottoms, # location of down arrow tips
angle = 90, # use "T" arrow heads
code = 3, # use arrow heads at both ends
length = 0.025 # width of arrow heads
))
}
this_spectra <- all_spectra[(all_spectra$cancer_type =="Liver"),-1]
avg.sig <- as.matrix(colMeans(this_spectra[,]))
colnames(avg.sig) <- "Liver"
par(oma = c(1.5,3,1.5,1.5))
#ICAMS::PlotCatalog(avg.sig)
PlotSpectraAsSigsWithUncertainty(this_spectra,avg.sig, title = "Mean.as.signature")
this_spectra <- all_spectra[(all_spectra$cancer_type =="Lung"),-1]
avg.sig <- as.matrix(colMeans(this_spectra[,]))
colnames(avg.sig) <- "Lung"
par(oma = c(1.5,3,1.5,1.5))
#ICAMS::PlotCatalog(avg.sig)
PlotSpectraAsSigsWithUncertainty(this_spectra,avg.sig, title = "Mean.as.signature")
this_spectra <- all_spectra[(all_spectra$cancer_type =="Skin"),-1]
avg.sig <- as.matrix(colMeans(this_spectra[,]))
colnames(avg.sig) <- "Skin"
par(oma = c(1.5,3,1.5,1.5))
#ICAMS::PlotCatalog(avg.sig)
PlotSpectraAsSigsWithUncertainty(this_spectra,avg.sig, title = "Mean.as.signature")
this_spectra <- all_spectra[(all_spectra$cancer_type =="ColoRect"),-1]
avg.sig <- as.matrix(colMeans(this_spectra[,]))
colnames(avg.sig) <- "ColoRect"
par(oma = c(1.5,3,1.5,1.5))
#ICAMS::PlotCatalog(avg.sig)
PlotSpectraAsSigsWithUncertainty(this_spectra,avg.sig, title = "Mean.as.signature")
this_spectra <- all_spectra[(all_spectra$cancer_type =="Lung"),-1]
avg.sig <- as.matrix(colMeans(this_spectra[,]))
colnames(avg.sig) <- "Lung"
par(oma = c(1.5,3,1.5,1.5))
ICAMS::PlotCatalog(avg.sig)
this_spectra <- all_spectra[(all_spectra$cancer_type =="Skin"),]
avg.sig <- as.matrix(colMeans(this_spectra[,-1]))
colnames(avg.sig) <- "Skin"
par(oma = c(1.5,3,1.5,1.5))
ICAMS::PlotCatalog(avg.sig)
this_spectra <- all_spectra[(all_spectra$cancer_type =="ColoRect"),]
avg.sig <- as.matrix(colMeans(this_spectra[,-1]))
colnames(avg.sig) <- "ColoRect"
par(oma = c(1.5,3,1.5,1.5))
ICAMS::PlotCatalog(avg.sig)
pca_res <- prcomp(all_spectra[,-1], center = TRUE, scale. = TRUE)
autoplot(pca_res,data = all_spectra[,], colour = 'cancer_type')
fviz_pca_ind(pca_res,
#addEllipses=TRUE, ellipse.level=0.95,
habillage=all_spectra[, "cancer_type"])
sc_all_spectra <- NULL
for (i in 0:5)
sc_all_spectra <- cbind(sc_all_spectra,as.matrix(rowMeans(all_spectra[,(2+i*16):(17+i*16)])))
sc_all_spectra <- as.data.frame(sc_all_spectra)
sc_all_spectra <- cbind(all_spectra[,1], sc_all_spectra)
colnames(sc_all_spectra) <- c("cancer_type","C>A","C>G","C>T","T>A","T>C","T>G")
sc_all_spectra <- as.data.frame(sc_all_spectra, stringsAsFactors = FALSE)
sc_all_spectra_split <- split(sc_all_spectra,sc_all_spectra$cancer_type)
sc_all_spectra_split <- lapply(sc_all_spectra_split, function(x){x[,-1, drop=F]})
colmean_split <- lapply(sc_all_spectra_split, colMeans)
sum_table <- do.call(cbind,colmean_split)
View(as.data.frame(sort(colSums(sum_table))))
#write_csv(as.data.frame(sum_table), "sum_table.csv")
gplots::heatmap.2(#x = t(sum_table[,-c(7, 12,18, 22)]),
x=sum_table,
scale = "column",
#scale ="row",
dendrogram = "none",
col = "heat.colors",
key = T,
keysize = 2,
margins = c(5, 5),
cex.axis = 1,
symm = FALSE,
trace = "none")
pca_res <- prcomp(all_spectra[-c(668,699, 2735),-1], center = TRUE, scale. = TRUE)
autoplot(pca_res,data = all_spectra[-c(668,699, 2735),], colour = 'cancer_type')
my_data <- all_spectra[-c(668,699, 2735),]
#write.csv(my_data, file = "my_data.csv")
# plotting with cancer type and sample ID annotated
fviz_pca_ind(pca_res,
#addEllipses=TRUE, ellipse.level=0.95,
habillage=all_spectra[-c(668,699,2735), "cancer_type"])
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
components <- pca_res[["x"]]
components <- data.frame(components)
components$PC2 <- -components$PC2
components$PC3 <- -components$PC3
components = cbind(components, all_spectra[-c(668,699, 2735),1])
tot_explained_variance_ratio <- summary(pca_res)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio <- 100 * sum(tot_explained_variance_ratio)
fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3, color = ~all_spectra[-c(668,699, 2735),1], colors = gg_color_hue(22)) %>%
add_markers(size = 12)
fig <- fig %>%
layout(
title = "3D plot after PCA"
#scene = list(bgcolor = "#e5ecf6")
)
fig
scores = as.data.frame(pca_res$x)
plot3d(scores[,1:3],
size=8,
#col = seq(nrow(scores))
col = as.numeric(factor(all_spectra$cancer_type)))
# text3d(scores[,1:3],
# texts=c(rownames(scores)),
# cex= 0.7, pos=3)
fviz_eig(pca_res, addlabels = TRUE)
# Some general exploration ## plot point quality
fviz_pca_ind(pca_res,
axes = c(1,2),
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = FALSE # Avoid text overlapping
)
fviz_pca_ind(pca_res,
axes = c(1,2),
col.ind = "contrib", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = FALSE # Avoid text overlapping
)
# plotting the samples with top 100 contribution
fviz_pca_ind(pca_res, select.ind = list(contrib = 100))
fviz_pca_ind(pca_res,
addEllipses=TRUE, ellipse.level=0.95,
habillage=all_spectra[-c(668,699,2735), "cancer_type"],
geom = "point")
fviz_pca_var(pca_res, select.var = list(contrib = 10), repel = TRUE)
# plot the top one contributing variable
fviz_pca_biplot(pca_res, label="var",
select.var = list(contrib = 1),
habillage=all_spectra[-c(668,699,2735), "cancer_type"],
repel = TRUE)
fviz_contrib(pca_res, choice = "var", axes = 1, top = 96)
# + theme(axis.text = element_text(size = 4.5))
# top 1000 drop to 0.05% contribution
test <- fviz_contrib(pca_res, choice = "var", axes = 1)
ts.names <- rownames(test$data)
ts <- test$data[,"contrib"]
table(ts>100/96)
##
## FALSE TRUE
## 45 51
sorted_index <- order(-ts)
bp <- barplot(ts,
#border = NA,
width = 2,
col = c(rep("blue",16),
rep("yellow",16),
rep("red",16),
rep("grey",16),
rep("green",16),
rep("pink",16)),
las = 2,
names.arg = names(ts),
space=0,
cex.axis = 1,
# Adjust when necessary
ylim= c(0,2.5),
ylab = "Variable Contribution to PC1")
top_n <- 10
# Adjust when necessary
text(bp[sorted_index[1:top_n]], ts[sorted_index[1:top_n]]+0.15, labels=ts.names[sorted_index[1:top_n]], srt=90, adj = c(0.5,1),xpd = TRUE, pos=3, cex =0.8)
# abline(h = 0.15, col = "#69b3a2", lty=3)
#mtext(names(pca_res$rotation[,2])[1:6], side = 1, line =2, srt=90, cex=1)
legend("top", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"),
bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = T, inset = c(0.05, 0.0005))
fviz_contrib(pca_res, choice = "var", axes = 2, top = 10)
test <- fviz_contrib(pca_res, choice = "var", axes = 2)
ts.names <- rownames(test$data)
ts <- test$data[,"contrib"]
table(ts>100/96)
##
## FALSE TRUE
## 52 44
sorted_index <- order(-ts)
bp <- barplot(ts,
#border = NA,
width = 2,
col = c(rep("blue",16),
rep("yellow",16),
rep("red",16),
rep("grey",16),
rep("green",16),
rep("pink",16)),
las = 2,
names.arg = names(ts),
space=0,
cex.axis = 1,
# Adjust when necessary
ylim= c(0,4),
ylab = "Variable Contribution to PC1")
top_n <- 10
# Adjust when necessary
text(bp[sorted_index[1:top_n]], ts[sorted_index[1:top_n]]+0.15, labels=ts.names[sorted_index[1:top_n]], srt=90, adj = c(0.5,1),xpd = TRUE, pos=3, cex =0.8)
# abline(h = 0.15, col = "#69b3a2", lty=3)
#mtext(names(pca_res$rotation[,2])[1:6], side = 1, line =2, srt=90, cex=1)
legend("top", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"),
bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = T, inset = c(0.05, 0.0005))
pc_load <- pca_res$rotation[,1]
sorted_index <- order(-abs(pca_res$rotation[,1]))
bp <- barplot(pca_res$rotation[,1], col = c(rep("blue",16),
rep("yellow",16),
rep("red",16),
rep("grey",16),
rep("green",16),
rep("pink",16)),
las = 2,
names.arg = names(pca_res$rotation[,1]),
cex.axis = 1,
ylim = c(-0.15, 0.03),
space = 0,
# width = 8,
ylab = "PC1 Loadings")
legend("top", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"),
bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = TRUE, inset = c(0.05, 0.05))
top_n <- 5
text(bp[sorted_index[1:top_n]], pc_load[sorted_index[1:top_n]]-0.015, labels=names(pc_load[sorted_index[1:top_n]]), srt=90,
#adj = c(0.5,1),
xpd = T, pos=3, cex =0.8)
#abline(h = -0.12, col = "#69b3a2", lty=3)
#text(bp[32], y = -1.2, labels = "y = -1.2", col= "#69b3a2")
# legend("top", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"), horiz = TRUE, x.intersp = 1.5, y.intersp = 1.5, inset = c(0, -0.1), y = 1.1)
pc_load <- pca_res$rotation[,2]
sorted_index <- order(-abs(pca_res$rotation[,2]))
bp <- barplot(pc_load,
#border = NA,
width = 2,
col = c(rep("blue",16),
rep("yellow",16),
rep("red",16),
rep("grey",16),
rep("green",16),
rep("pink",16)),
las = 2,
names.arg = names(pca_res$rotation[,2]),
space=0,
cex.axis = 1,
ylim= c(-0.2,0.2),
ylab = "PC2 Loadings")
top_n <- 5
text(bp[sorted_index[1:top_n]],
#pc_load[sorted_index[1:top_n]]+0.015,
ifelse(pc_load[sorted_index[1:top_n]]>0,pc_load[sorted_index[1:top_n]]+0.015,pc_load[sorted_index[1:top_n]]-0.025),
labels=names(pc_load[sorted_index[1:top_n]]), srt=90, adj = c(0.5,1),xpd = TRUE, pos=3, cex =0.8)
abline(h = 0.15, col = "#69b3a2", lty=3)
abline(h = -0.15, col = "#69b3a2", lty = 3)
#mtext(names(pca_res$rotation[,2])[1:6], side = 1, line =2, srt=90, cex=1)
legend("topright", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"),
bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = FALSE, inset = c(0.05, 0.12), yjust = 1.2)
## Loadings for PC3
pc_load <- pca_res$rotation[,3]
sorted_index <- order(-abs(pca_res$rotation[,3]))
bp <- barplot(pc_load,
#border = NA,
width = 2,
col = c(rep("blue",16),
rep("yellow",16),
rep("red",16),
rep("grey",16),
rep("green",16),
rep("pink",16)),
las = 2,
names.arg = names(pca_res$rotation[,2]),
space=0,
cex.axis = 1,
ylim= c(-0.2,0.35),
ylab = "PC3 Loadings")
top_n <- 5
text(bp[sorted_index[1:top_n]],
#pc_load[sorted_index[1:top_n]]+0.015,
ifelse(pc_load[sorted_index[1:top_n]]>0,pc_load[sorted_index[1:top_n]]+0.015,pc_load[sorted_index[1:top_n]]-0.025),
labels=names(pc_load[sorted_index[1:top_n]]), srt=90, adj = c(0.5,1),xpd = TRUE, pos=3, cex =0.8)
#abline(h = 0.15, col = "#69b3a2", lty=3)
#abline(h = -0.15, col = "#69b3a2", lty = 3)
#mtext(names(pca_res$rotation[,2])[1:6], side = 1, line =2, srt=90, cex=1)
legend("topright", legend = c("C>A","C>G","C>T","T>A","T>C","T>G"), col = c("blue","yellow","red","grey","green","pink"),
bty = "n", pch=20 , pt.cex = 2, cex = 0.8, horiz = FALSE, inset = c(0.05, 0.12), yjust = 1.2)
# Comparison with tsne
tsne_res <- Rtsne(all_spectra[-c(668,699),-1])
# Conversion of matrix to dataframe
tsne_plot <- data.frame(x = tsne_res$Y[,1],
y = tsne_res$Y[,2], colour = all_spectra[-c(668,699),"cancer_type"])
# Plotting the plot using ggplot() function
#ggplot2::ggplot(tsne_plot,label=Species)+ geom_point(aes(x=x,y=y))
ggplot(tsne_plot, aes(x, y, colour = colour)) +
geom_point()
umap_res = umap(all_spectra[-c(668,699),-1], n_components = 2, k = 10)
layout <- umap_res[["layout"]]
layout <- data.frame(layout)
final <- cbind(layout, all_spectra[-c(668,699),1])
colnames(final) <- c('X1', 'X2', 'cancer_type')
# fig <- plot_ly(final, x = ~X1, y = ~X2, split = ~cancer_type, type = 'scatter', mode = 'markers')%>%
# layout(
# plot_bgcolor = "#e5ecf6",
# legend=list(title=list(text='cancer_type')),
# xaxis = list(
# title = "0"),
# yaxis = list(
# title = "1"))
# fig
final %>%
ggplot(aes(x = X1,
y = X2,
color = cancer_type))+
geom_point()+
labs(x = "UMAP1",
y = "UMAP2",
subtitle = "UMAP plot")